Including Plots
library(randomForest)
## randomForest 4.7-1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
dat = CancerData.clean1
A = dat$`Treatment 1`
A = as.factor(A)
H = dat[,2:8]
H$`Def Loc Rx?` = as.numeric(H$`Def Loc Rx?`=="Yes")
H$Strat = as.numeric(H$Strat == "H")
# [1] "Age at Reg" "Def Loc Rx?"
# [3] "Time, Horm Rx to Reg (mo)" "Strat"
# [5] "Bsln PSA" "Alk Phos"
# [7] "Hgb"
colnames(H) = paste("x",1:7,sep = "")
Y1 = dat$T1
Y2 = dat$E1
Y1 = (Y1=="No ET") * 0.5
Y2 = (Y2=="S") * 0.2
# Y1 = (Y1=="No ET")
# Y2 = (Y2=="S")
Overall.Y = cbind(Y1,Y2)
class.A = sort(unique(A))
n = length(A)
k<-K<-length(class.A)
m<-N<-length(A)
label0 = paste(c("A","B","C","D"),":",class.A,sep="")
### LM with interaction
mus1.reg <- Reg.mu(Y = Y1, As = A, H = H)$mus.reg
mus2.reg <- Reg.mu(Y = Y2, As = A, H = H)$mus.reg
mu1.hat = mus1.reg
mu2.hat = mus2.reg
textsize = 12
### random forest
# RF1<-randomForest(Y1 ~., data=data.frame(A,H))
# RF2<-randomForest(Y2 ~., data=data.frame(A,H))
#
# mus1.reg<-matrix(NA,n,length(class.A))
# for(i in 1L:length(class.A)){
# mus1.reg[,i]<-predict(RF1,newdata=data.frame(A=rep(class.A[i],n),H))
# }
#
# mus2.reg<-matrix(NA,n,length(class.A))
# for(i in 1L:length(class.A)){
# mus2.reg[,i]<-predict(RF2,newdata=data.frame(A=rep(class.A[i],n),H))
# }
#
# mu1.hat = mus1.reg
# mu2.hat = mus2.reg
#
# for(ii in 1:5){
# plot(mu1.hat[ii,],mu2.hat[ii,])
# text(mu1.hat[ii,],mu2.hat[ii,],as.character(class.A))
# }
### BART
# library(BayesTree)
# pred_X = rbind(data.frame(A=rep(class.A[1],n),H),
# data.frame(A=rep(class.A[2],n),H),
# data.frame(A=rep(class.A[3],n),H),
# data.frame(A=rep(class.A[4],n),H))
#
#
# pred_T<- bart(
# as.data.frame(cbind(A,H)),as.numeric(Y1),
# x.test = pred_X)
# muhat.vals <- colMeans(pred_T$yhat.test)
# mus1.reg <- matrix(muhat.vals, nrow=n,ncol=length(class.A))
#
# pred_T<- bart(
# as.data.frame(cbind(A,H)),as.numeric(Y2),
# x.test = pred_X)
# muhat.vals <- colMeans(pred_T$yhat.test)
# mus2.reg <- matrix(muhat.vals, nrow=n,ncol=length(class.A))
#
# mu1.hat = mus1.reg
# mu2.hat = mus2.reg
### AIPTW
# source("~/Desktop/Multi-Outcome Tree/Yao file/MOTRL package/R/M.propen.R")
# source("~/Desktop/Multi-Outcome Tree/Yao file/MOTRL package/R/Reg.mu.R")
# library(nnet)
#
# pis.hat <- M.propen(A = A,Xs = H)
# pis.hat[pis.hat<0.05] = 0.05
#AIPW estimates
# mus.a1<-matrix(NA,N,K)
# for(k in 1L:K){
# mus.a1[,k] <- (A==class.A[k])*Y1/pis.hat[,k]+(1-(A==class.A[k])/pis.hat[,k])*mus1.reg[,k]
# }
#
# #AIPW estimates
# mus.a2<-matrix(NA,N,K)
# for(k in 1L:K){
# mus.a2[,k] <- (A==class.A[k])*Y2/pis.hat[,k]+(1-(A==class.A[k])/pis.hat[,k])*mus2.reg[,k]
# }
# mu1.hat = mus.a1
# mu2.hat = mus.a2
w0 = seq(0,1,0.05)
# treeout = DTRtree(mus.hat = list(mu1.hat,mu2.hat),
# A,H,pis.hat=NULL,
# mus.reg=NULL,depth=3,
# minsplit=20,w_vec = cbind(w0,1-w0),weight_combine = "mean",lambda = 0.001)
treeout = DTRtree(mus.hat = list(mu1.hat,mu2.hat),
A,H,pis.hat=NULL,
mus.reg=NULL,depth=3,
minsplit=20,w_vec = cbind(w0,1-w0),weight_combine = "max",lambda = 0.01)
treeout
## node X cutoff mEy group
## 1 1 7 13.0 0.02578032 NA
## 2 2 2 0.0 0.03254700 NA
## 3 3 3 30.1 0.01971496 NA
## 4 4 NA NA NA 4
## 5 5 NA NA NA 5
## 6 6 NA NA NA 6
## 7 7 NA NA NA 7
## 8 8 NA NA NA NA
## 9 9 NA NA NA NA
## 10 10 NA NA NA NA
## 11 11 NA NA NA NA
## 12 12 NA NA NA NA
## 13 13 NA NA NA NA
## 14 14 NA NA NA NA
## 15 15 NA NA NA NA
newdata = H
(leaf_pred = predict_leaf.DTR(treeout, H))
## [1] 4 6 4 7 6 6 6 6 6 6 7 7 4 7 7 7 6 6 6 5 5 6 5 4 5 7 5 5 4 5 7 5 7 5 5 7 7
## [38] 5 4 7 7 4 4 5 5 7 4 6 7 6 5 6 4 5 6 4 5 6 7 6 4 6 5 4 6 7 5 7 5 6 6 5 5 4
## [75] 6 4 5 7 7 7 7 7 5 6 7 5 6 4 5 5 4 4 5 6 7 5 4 4 5 6 6 6 5 6 5 6 5 7 5 7 7
## [112] 6 4 6 7 5 4 6 7 5 4 7 4 5 7 4 5 7 4 7 6 6 6 7 5 7 5 4 7 6 5 7 5 7
table(leaf_pred)
## leaf_pred
## 4 5 6 7
## 27 41 37 39
leaf_n = table(leaf_pred)
tree0 = vis_tree(max_h = 3,treeout,number = table(leaf_pred))
plot(tree0)
Utility visualization
library(ggpubr)
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:randomForest':
##
## combine
## The following object is masked from 'package:dplyr':
##
## combine
library("cowplot")
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
library(grid)
p_list = list()
leaf_vec = sort(unique(leaf_pred))
n_leaf = length(leaf_vec)
n_record = rep(0,n_leaf)
for(ii in 1:n_leaf){
chosen = which(leaf_pred==leaf_vec[ii])
(p_list[[ii]] = my_vis(mu1.hat,mu2.hat,chosen,k=4,label=label0,lwd=1.5))
n_record[ii] = sum(chosen)
}
# 2. Save the legend
legend <- get_legend(p_list[[1]])
# 3. Remove the legend from the box plot
for(ii in 1:n_leaf){
p_list[[ii]] <- p_list[[ii]] + theme(legend.position="none") +
ggtitle(paste("Leaf Node",ii,", N=",leaf_n[ii]))
}
# 4. Create a blank plot
blankPlot <- ggplot()+geom_blank(aes(1,1)) +
cowplot::theme_nothing()
# pdf(file = "~/Desktop/Multi_Outcome/cluster/LZ_compare/LZ_assumption.pdf",
# width = 17.4 / 2.54*1.5, # The width of the plot in inches
# height = 17.4 / 2.54 * 0.75) # The height of the plot in inches
grid.arrange(p_list[[1]],p_list[[3]],
legend,p_list[[2]],p_list[[4]], legend, ncol=3,
widths=c(5, 5, 2),
top=textGrob("Model Performance Under Different Weights",
gp=gpar(fontsize=18,fontface="bold")))

Point Visualization
p_list2 = list()
p_list3 = list()
leaf_vec = sort(unique(leaf_pred))
n_leaf = length(leaf_vec)
for(ii in 1:n_leaf){
chosen = which(leaf_pred==leaf_vec[ii])
(p_list2[[ii]] = my_vis_elipse(mu1.hat,mu2.hat,chosen,
k=4,label=label0,lwd=3))
(p_list3[[ii]] = my_vis_mean(mu1.hat,mu2.hat,chosen,
k=4,label=label0,lwd=3))
}
# + xlim(-0.3,0.25) + ylim(-0.2,0.3)
plot4 = function(p_list){
# 2. Save the legend
legend <- get_legend(p_list[[1]])
# 3. Remove the legend from the box plot
for(ii in 1:n_leaf){
p_list[[ii]] <- p_list[[ii]] + theme(legend.position="none") +
ggtitle(paste("Leaf Node",leaf_vec[ii],", N=",leaf_n[ii]))
}
p1 <- p_list[[1]] + theme(legend.position="none")
p2 <- p_list[[2]] + theme(legend.position="none") + xlim(-0.3,0.25) + ylim(-0.2,0.3)
p3 <- p_list[[3]] + theme(legend.position="none")
p4 <- p_list[[4]] + theme(legend.position="none")
# 4. Create a blank plot
blankPlot <- ggplot()+geom_blank(aes(1,1)) +
cowplot::theme_nothing()
# pdf(file = "~/Desktop/Multi_Outcome/cluster/LZ_compare/LZ_assumption.pdf",
# width = 17.4 / 2.54*1.5, # The width of the plot in inches
# height = 17.4 / 2.54 * 0.75) # The height of the plot in inches
p = grid.arrange(p1,p3,
legend,p2,p4, legend, ncol=3,
widths=c(5, 5, 2),
top=textGrob("Different Leaf Node's Shape",
gp=gpar(fontsize=18,fontface="bold")))
p
}
# triangle
plot_grid(plot4(p_list = p_list2))
## Warning: Removed 2 rows containing missing values (`geom_point()`).


plot_grid(plot4(p_list = p_list3))


Pairwise Distance
check_pair = function(i,j){
chosen1 = (leaf_pred==leaf_vec[i])
chosen2 = (leaf_pred==leaf_vec[j])
(p = my_vis_pair(mu1.hat,mu2.hat,chosen1,chosen2,k=4,label=label0,lwd=1.5)+
ggtitle(paste("Group",i,"v.s.","Group",j)))
p
}
q12=check_pair(1,2)+ theme(legend.position="none")+ ylab("Distance")
q13=check_pair(1,3)+ theme(legend.position="none")+ ylab("Distance")
q14=check_pair(1,4)+ theme(legend.position="none")+ ylab("Distance")
q23=check_pair(2,3)+ theme(legend.position="none")+ ylab("Distance")
q24=check_pair(2,4)+ theme(legend.position="none")+ ylab("Distance")
q34=check_pair(3,4)+ theme(legend.position="none")+ ylab("Distance")
Pairwise Mean
library(ggpattern)
require("magick")
## Loading required package: magick
## Linking to ImageMagick 6.9.12.3
## Enabled features: cairo, fontconfig, freetype, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fftw, ghostscript, x11
library(magick)
label1 = c("A","B","C","D")
i = 1
j = 3
chosen1 = which(leaf_pred==leaf_vec[i])
chosen2 = which(leaf_pred==leaf_vec[j])
(p = my_vis_mean_pair(mu1.hat,mu2.hat,chosen1,chosen2,i,j,label1,k,point_size=5))

p2 = my_vis_all_pair(mu1.hat,mu2.hat,leaf_pred,label1,k,alpha = 1)
# 2. Save the legend
Total_legend <- get_legend(p2)
generate_mean_pair = function(i,j){
chosen1 = which(leaf_pred==leaf_vec[i])
chosen2 = which(leaf_pred==leaf_vec[j])
(p = my_vis_mean_pair(mu1.hat,mu2.hat,chosen1,chosen2,i,j,label1,k,point_size=5)+
ggtitle(paste("Group",i,"v.s.","Group",j)))
return(p)
}
p12 = generate_mean_pair(1,2)+ theme(legend.position="none")
p13 = generate_mean_pair(1,3)+ theme(legend.position="none")
p14 = generate_mean_pair(1,4)+ theme(legend.position="none")
p23 = generate_mean_pair(2,3)+ theme(legend.position="none")
p24 = generate_mean_pair(2,4)+ theme(legend.position="none")
p34 = generate_mean_pair(3,4)+ theme(legend.position="none")
library(ggplot2)
library(gridExtra)
library(grid)
p = grid.arrange(p_list[[1]],p12,p13,p14,
q12,p_list[[2]],p23,p24,
q13,q23,p_list[[3]],p34,
q14,q24,q34,p_list[[4]], ncol=4,
widths=c(5, 5, 5,5),
top=textGrob("Model Performance Under Different Weights",
gp=gpar(fontsize=18,fontface="bold")))

p <- plot_grid( p,Total_legend, ncol = 1, rel_heights = c(1, .2))
pdf(file = "diagnoal plot.pdf", # The directory you want to save the file in
width = 12, # The width of the plot in inches
height = 9) # The height of the plot in inches
# Step 2: Create the plot with R code
p
# Step 3: Run dev.off() to create the file!
dev.off()
## quartz_off_screen
## 2